######################################################################################
# Project:  Complexity Project
#
# Task:     Generate simulation results
#
# Author:   Gijs Schumacher
# Date:     November 2021
#
######################################################################################

library(faux)
library(ggplot2)
load("speeches.RData")


# Run regression analyses -------------------------------------------------
nsims <- 100

simulation <- function(r, dataset){
  simulated.dv <- replicate(n=1, rnorm_pre(dataset$complexity, r=r))
  vars1 <- as.formula(scale(simulated.dv) ~ scale(lr) + scale(pc) + year2 + office)
  pooled.analysis <- lm(vars1, data=dataset)
  coefs <- pooled.analysis$coefficients[3]
  #se <- sqrt(diag(vcov(pooled.analysis)))[3]
  p <- summary(pooled.analysis)[4][1]$coefficients[3,4]
  output <- c(coefs,p)
  print("Done!")
  return(output)
}

test.rs <- seq(0.25, 0.75, 0.25)

sv.r75 <- replicate(n=nsims, simulation(r=0.75, dataset=speeches[speeches$country=="Sweden" &  speeches$data.set=="parliamentary speeches",]))
sv.r5 <- replicate(n=nsims, simulation(r=0.5, dataset=speeches[speeches$country=="Sweden" &  speeches$data.set=="parliamentary speeches",]))
sv.r25 <- replicate(n=nsims, simulation(r=0.25, dataset=speeches[speeches$country=="Sweden" &  speeches$data.set=="parliamentary speeches",]))

uk.r75 <- replicate(n=nsims, simulation(r=0.75, dataset=speeches[speeches$country=="UK" &  speeches$data.set=="parliamentary speeches",]))
uk.r5 <- replicate(n=nsims, simulation(r=0.5, dataset=speeches[speeches$country=="UK" &  speeches$data.set=="parliamentary speeches",]))
uk.r25 <- replicate(n=nsims, simulation(r=0.25, dataset=speeches[speeches$country=="UK" &  speeches$data.set=="parliamentary speeches",]))

es.r75 <- replicate(n=nsims, simulation(r=0.75, dataset=speeches[speeches$country=="Spain" &  speeches$data.set=="parliamentary speeches",]))
es.r5 <- replicate(n=nsims, simulation(r=0.5, dataset=speeches[speeches$country=="Spain" &  speeches$data.set=="parliamentary speeches",]))
es.r25 <- replicate(n=nsims, simulation(r=0.25, dataset=speeches[speeches$country=="Spain" &  speeches$data.set=="parliamentary speeches",]))


de.r75 <- replicate(n=nsims, simulation(r=0.75, dataset=speeches[speeches$country=="Germany" &  speeches$data.set=="parliamentary speeches",]))
de.r5 <- replicate(n=nsims, simulation(r=0.5, dataset=speeches[speeches$country=="Germany" &  speeches$data.set=="parliamentary speeches",]))
de.r25 <- replicate(n=nsims, simulation(r=0.25, dataset=speeches[speeches$country=="Germany" &  speeches$data.set=="parliamentary speeches",]))

nl.r75 <- replicate(n=nsims, simulation(r=0.75, dataset=speeches[speeches$country=="Netherlands" &  speeches$data.set=="parliamentary speeches",]))
nl.r5 <- replicate(n=nsims, simulation(r=0.5, dataset=speeches[speeches$country=="Netherlands" &  speeches$data.set=="parliamentary speeches",]))
nl.r25 <- replicate(n=nsims, simulation(r=0.25, dataset=speeches[speeches$country=="Netherlands" &  speeches$data.set=="parliamentary speeches",]))

wh.r75 <- replicate(n=nsims, simulation(r=0.75, dataset=speeches))
wh.r5 <- replicate(n=nsims, simulation(r=0.5, dataset=speeches))
wh.r25 <- replicate(n=nsims, simulation(r=0.25, dataset=speeches))

betas <- c(sv.r75[1,],sv.r5[1,],sv.r25[1,],uk.r75[1,],uk.r5[1,],uk.r25[1,], 
           de.r75[1,],de.r5[1,],de.r25[1,],nl.r75[1,],nl.r5[1,],nl.r25[1,],
           es.r75[1,],es.r5[1,],es.r25[1,],wh.r75[1,],wh.r5[1,] ,wh.r25[1,]) 
ps  <- c(sv.r75[2,],sv.r5[2,],sv.r25[2,],uk.r75[2,],uk.r5[2,],uk.r25[2,], 
                de.r75[2,],de.r5[2,],de.r25[2,],nl.r75[2,],nl.r5[2,],nl.r25[2,],
                es.r75[2,],es.r5[2,],es.r25[2,],wh.r75[2,],wh.r5[2,] ,wh.r25[2,]) 

data <- data.frame(betas,ps)
data$country <- c(rep("Sweden",300),rep("United Kingdom",300),
                  rep("Germany",300), rep("Netherlands",300),
                  rep("Spain", 300), rep("Pooled Sample",300))
data$r <- rep(rep(c("r = 0.75", "r = 0.5", "r = 0.25"), each=100),6)

plot <- ggplot(data, aes(x=ps, y=betas)) +
  geom_point() +
  facet_grid(r~country) +
  geom_vline(xintercept=0.05, colour="red")

data$p.threshold <- ifelse(data$ps<0.05,1,0)

props <- aggregate(data$p.threshold, by=list(data$country, data$r), mean)
props$x <- paste0(round(props$x * 100,0),"%")
colnames(props) <- c("country", "r", "text")
props$text[1] <- paste0("% of p-values\n<.05=", props$text[1])


plot <- plot + geom_text(data=props, aes(x=0.8, y=-0.1, label=text)) + theme(axis.text.x = element_text(angle=90)) +
  xlab("Simulated p-values") + ylab("Simulated betas") + ggtitle("Simulated betas and p-values for 1000 simulations per facet")

ggsave(plot, file="simulation_plot.jpg", width=7, height=4.5)
